This layer is derived from EPA Beach Closure data. We use the number of days a beach is closed per year due to pathogens in the water as a proxy for the impact of pathogens in coastal waters. Data is provided at the beach level, aggregated to county and then again aggregated to the region level.
The highest observed pressure score was 9.5% in Rhode Island. Indicating, on average a given beach would be closed 9.5% of the year. This high result was largely driven to the Atlantic Beach Club beach being closed more than 80 days in 2006.
knitr::opts_chunk$set(fig.width = 7, fig.height = 7,message = FALSE, warning = FALSE)
source('~/github/ohi-northeast/src/R/common.R')
library(tidyverse)
library(plotly)# data for all states exept New York
df <- read_csv('data/beach_actions_(advisories_and_closures).csv')I downloaded the New York beaches dataset on its own. We have to filter out beaches from the great lakes and finger lakes regions.
ny <- read_csv('data/beach_actions_(advisories_and_closures)_NY.csv')%>%
filter(County %in% c('BRONX','QUEENS','KINGS','SUFFOLK','NASSAU','RICHMOND','WESTCHESTER')) #ocean countiesSince Massachussetts has state waters divided into two regions, we have to manually assign counties to the Gulf of Maine and Virginian regions
split <- df%>%
filter(County %in% c('BARNSTABLE','PLYMOUTH'))%>%
select(State,County,`Beach Name`)%>%
unique()
nrow(split)## [1] 197
write.csv(split,file = 'data/ma_beaches.csv')There are 197 beaches in these two counties that require manual matching. Using the BEACON map viewer, I matched each beach in Plymouth and Barnstable Counties to either Region 4 or 5.
Now combine all beaches to rgn_ids
mass_bch <- read_csv('data/ma_beaches_rgn_id.csv')[,1:4]
mass_cnty <- read_csv('~/github/ohi-northeast/src/tables/MA_counties.csv')[,2:4]
mass_cnty$County = toupper(mass_cnty$County)
df_pb <- df%>%
filter(State == 'MA' & County %in% c('BARNSTABLE','PLYMOUTH'))%>%
left_join(mass_bch,by = c('State','County','Beach Name'))%>%
select(State,County,Year,`Beach Name`,ActionStartDate,ActionEndDate,`ActionDuration Days`,rgn_id)
df_ma <- df%>%
filter(State == 'MA'&
!County %in% c('BARNSTABLE','PLYMOUTH'))%>%
left_join(mass_cnty,by = 'County')%>%
select(State,County,Year,`Beach Name`,ActionStartDate,ActionEndDate,`ActionDuration Days`,rgn_id)
#adding State column to rgn_data
rgn_data$State = as.character(rgn_data$rgn_abrev)
#matching rgn_id to non MASS data
df_rest <- df%>%
rbind(ny)%>% #add in New York
filter(State != 'MA')%>%
left_join(rgn_data,by = 'State')%>%
select(State,County,Year,`Beach Name`,ActionStartDate,ActionEndDate,`ActionDuration Days`,rgn_id)
df_all = rbind(df_pb,df_ma,df_rest)%>%
left_join(rgn_data,by = 'rgn_id')%>%
select(-State.y,-rgn_abrev)library(trelliscopejs)
#percent of days beach is closed by county
perc <- df_all%>%
unique()%>% #some weird duplicates
group_by(`Beach Name`, Year, County, rgn_id,rgn_name)%>%
summarize(days_closed = sum(`ActionDuration Days`))%>%
mutate(perc_closed = days_closed/365)%>%
ungroup()%>%
group_by(County, rgn_id, Year,rgn_name)%>%
summarize(perc_closed = mean(perc_closed))
ggplot(perc,aes(x = Year,y = perc_closed,color = `County`))+
geom_line()+
trelliscopejs::facet_trelliscope(~rgn_name,self_contained = TRUE)The mean annual proportion a given beach is closed per year in each region is calculated and plotted below.
st <- perc%>%
group_by(rgn_name,rgn_id,Year)%>%
summarize(m = mean(perc_closed)*100)
g <- ggplot(st,aes(x = Year,y = m,color = rgn_name))+
geom_line()+
labs(title = "Beach Closures (mean annual proportion any given beach is closed)",
y = "Proportion (%) of year")
ggplotly(g)Scores are rescaled using the maximum value across regions, across all time. The reference value is 9.62%.
scores <- st%>%
mutate(score = m/max(.$m))Each region is scored based on the mean annual proportion any given beach within a county is closed - aggregated to regional level.
## scores through time
time_plot <- ggplot(scores,aes(x = Year,y = score, col = rgn_name))+
geom_line() +
labs(title = "Pressure Layer: Chemicals",
x = "Year",
y = "Pressure Score",
colour = "Region")
ggplotly(time_plot)map_scores(scores%>%filter(Year==2016),
scale_label = 'Pressure Score',
map_title = "2016",
rev_col = T)out <- scores%>%
select(rgn_name,rgn_id,year=Year,score)
#write.csv(out,file = 'scores/pathogens.csv')We’ll need to gapfill over time